Testing code for the final project

To accomplish:

-Looking through the datasets we have available -Need Google Trend data for search terms such as “Cruz” vs “Rourke” or “midterm election” to see if number of searches correlate with # votes for each candidate or # registered voters -What kind of analysis do we want to see -Review the sample final projects

Some links to keep in mind: https://www.cnn.com/election/2018/exit-polls/texas/senate https://www.texastribune.org/2018/10/31/ut-tt-poll-texans-say-immigration-border-security-top-issues/

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0     ✔ purrr   0.2.5
## ✔ tibble  1.4.2     ✔ dplyr   0.7.6
## ✔ tidyr   0.8.1     ✔ stringr 1.3.1
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## ── Conflicts ────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(rvest)
## Loading required package: xml2
## 
## Attaching package: 'rvest'
## The following object is masked from 'package:purrr':
## 
##     pluck
## The following object is masked from 'package:readr':
## 
##     guess_encoding
library(httr)
library(ggridges)
## 
## Attaching package: 'ggridges'
## The following object is masked from 'package:ggplot2':
## 
##     scale_discrete_manual
library(tigris)
## To enable 
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
## 
## Attaching package: 'tigris'
## The following object is masked from 'package:graphics':
## 
##     plot
library(leaflet)
library(dplyr)
library(purrr)

scraping election results from the web

New York Times voting results by county

url = "https://www.nytimes.com/elections/results/texas-senate"
nytimes_data = read_html(url, col_types = "ccdd")

nytimes_data
## {xml_document}
## <html lang="en" itemscope="" xmlns:og="http://opengraphprotocol.org/schema/" itemtype="http://schema.org/NewsArticle">
## [1] <head>\n<title>Texas Senate Election Results: Beto O’Rourke vs. Ted  ...
## [2] <body class="eln-race-page eln-2018-11-06 eln-forecast">\n<script ty ...
nytimes_data %>% 
  html_nodes(css = "table")
## {xml_nodeset (2)}
## [1] <table class="eln-table eln-results-table">\n<thead><tr>\n<th class= ...
## [2] <table class="eln-table eln-county-table">\n<thead><tr>\n<th class=" ...

This seems to have created two tables from the website data.

table_overall = (nytimes_data %>% html_nodes(css = "table")) %>% 
  .[[1]] %>%
  html_table()

Tidying overall table for exploratory analysis

#remove the commas in the numbers first
table_overall$Votes <- gsub(",","",table_overall$Votes)
table_overall$Pct. <- gsub("%","",table_overall$Pct.)

#convert votes and percent to numeric
table_overall = 
  table_overall %>% 
  mutate(Votes = as.numeric(Votes), Pct. = as.numeric(Pct.))

Bar Plot of Votes per Candidate

bar_overall =
  table_overall %>% 
  mutate(Candidate = as.factor(Candidate)) %>% 
  mutate(Candidate = forcats::fct_reorder(Candidate, Votes)) %>% 
  ggplot(aes(x = Candidate, y = Votes, fill = Party)) +
  geom_bar(stat = "identity") +
  labs(
    title = "Number of Votes per Candidate",
    x = "Candidate",
    y = "Number of Votes"
  ) +
  scale_x_discrete(labels = c("Cruz* Ted Cruz*" = "Ted Cruz", "O'Rourke Beto O'Rourke" = "Beto O'Rourke",
                              "Dikeman Neal Dikeman" = "Neal Dikeman"))
  
bar_overall

Made the first table that which we have final results for the state of texas.

table_county =  (nytimes_data %>% html_nodes(css = "table")) %>% 
  .[[2]] %>%
  html_table() %>% 
  slice(1:(n() - 1)) %>% 
  select(-Rpt.) %>% 
  janitor::clean_names()

Tidying county table

#remove the commas in the numbers first
table_county$cruz <- gsub(",","",table_county$cruz)
table_county$o_rourke <- gsub(",","",table_county$o_rourke)
table_county$dikeman <- gsub(",","",table_county$dikeman)

table_county = 
  table_county %>% 
  mutate(cruz = as.numeric(cruz), o_rourke = as.numeric(o_rourke), dikeman = as.numeric(dikeman))

table_county$county = str_replace(table_county$county,"La Vaca", "Lavaca")
table_county$county = str_replace(table_county$county,"De Witt", "DeWitt")
#county names misspelled (was causing issues with merging later on)
table_county_long = gather(table_county, key = candidate, value = votes, cruz:dikeman)

Made the second table which has all of the 254 county level data for Texas!

Plots for all counties

point_county =
  table_county_long %>% 
  filter(candidate != "dikeman") %>% 
  ggplot(aes(x = county, y = votes, color = candidate)) +
  geom_point() +
  labs(
    title = "Number of Votes for Beto vs Cruz",
    x = "County",
    y = "Number of Votes"
  ) +
  theme(axis.text.x = element_text(angle = 90), text = element_text(size = 3))
  
point_county

ggplot(table_county_long, aes(x = candidate, y = votes)) + geom_boxplot()

ggplot(table_county_long, aes(x = votes, y = candidate)) + 
  geom_density_ridges(scale = .85)
## Picking joint bandwidth of 1080

comparing these county level election results to highly searched voter election interests in google

-using search terms “Midterms” and selecting dataset from top result

district_searches = read_csv(file =  "./data/Search_Data_US_Congressional_District_26Sep2018.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   District = col_character(),
##   Code = col_character(),
##   State = col_character(),
##   FIRST = col_character(),
##   SECOND = col_character(),
##   THIRD = col_character(),
##   FOURTH = col_character(),
##   FIFTH = col_character(),
##   SIXTH = col_character(),
##   SEVENTH = col_character(),
##   EIGHTH = col_character(),
##   NINTH = col_character(),
##   TENTH = col_character(),
##   `Maternity leave in the United States` = col_integer(),
##   `Single-payer healthcare` = col_integer(),
##   `Tax Cuts and Jobs Act of 2017` = col_integer(),
##   `Transgender people in the military` = col_integer()
## )
## See spec(...) for full column specifications.
TX_searches = 
  district_searches %>% janitor::clean_names() %>% 
  filter(state == "TX")

TX_counts = 
  TX_searches %>% 
  count(fifth)


#the topics that were searched most were health care, immigration, mental health, united nations
#second most searched: immigration, health care, Medicare, Medicaid, capital punishment
#third most searched: Medicare, Medicaid, September 11 attacks, immigration, 
#fourth most searched: Medicaid, Medicare, Immigration...
#fifth: Medicaid, mental health, Medicare, September 11 attacks...
#Thus, we should focus on the variables health care, immigration, Medicare, Medicaid, Mental health, September 11 attacks

uploading county congressional district txt file

congress_district = read.csv(file = "./data/congress_district2.csv") 

congress_district$county_name = str_replace(congress_district$county_name,"Sterlin", "Sterling")
congress_district$county_name = str_replace(congress_district$county_name,"MuCulloch","McCulloch")
#misspelled counties discovered while exploring data when merging later on
TX_searches =
  TX_searches %>% 
  separate(code, into = c("remove_1", "district_num"), sep = "-") %>% 
  mutate(district_num = as.numeric(district_num)) %>% 
  select(district_num, most_searched = first, x2003_invasion_of_iraq:womens_health) 

nested_congress =
  congress_district %>% 
  nest(county_name) 
    
merged_searches= merge(TX_searches, nested_congress, by="district_num", all=TRUE) %>% 
  unnest() 

#rename variable county_name
merged_searches = merged_searches %>% 
  select(district_num, county=county_name, everything())

merged_nyt_searches = merge(merged_searches, table_county, by= "county", all=TRUE)

plot for topics search per county

Still figuring out how to display this ideas: 1) interactive barchart in the current long format, if we use shiny we could show how the top topics vary among counties through use of drop-down menu to select county, etc. 2) figure out we can juxtapose how the counties voted vs. topics. Use of plotly for interactivity? 3) Alternative to first option, how can we show the distribution of topics among counties instead? 4)focus on 5 biggest counties or districts? but this would be biased as it may be a metropolitan area

merged_nyt_searches_long = gather(merged_nyt_searches, key = topics, value = search_interest, health_care:september_11_attacks)

comparing these county level election results to search results by candidate

map

merged_nyt_searches = read_csv(file ="./data/merged_nyt_searches.csv") #previously merged_nyt_searches dataframe into a csv file
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   county = col_character(),
##   district_num = col_integer(),
##   most_searched = col_character(),
##   gender_workplace_diversity = col_integer(),
##   maternity_leave_in_the_united_states = col_integer(),
##   single_payer_healthcare = col_integer(),
##   tax_cuts_and_jobs_act_of_2017 = col_integer(),
##   transgender_people_in_the_military = col_integer(),
##   cruz = col_integer(),
##   o_rourke = col_integer(),
##   dikeman = col_integer()
## )
## See spec(...) for full column specifications.
gis = merged_nyt_searches %>% 
  select(county, most_searched, cruz, o_rourke, dikeman) %>% 
  mutate(cruz_orour_rat = cruz/o_rourke,
         status = as.numeric(cruz_orour_rat>=1),
         winner = ifelse(status==1, "Ted Cruz", "Beto O'Rourke"))

gis = unique(gis) 

gis = gis %>% 
nest(most_searched) 

listcollapse = function(x){
  paste(unlist(x), collapse = ", ")
}

gis = gis %>% 
  mutate( most_searched = map(data, listcollapse)) %>% 
  select(-data)

Cleaning data so as to merge with GIS…

texas_counties = counties(state = "48",year = "2017") #Texas fips code is 48, using most recent census data, 2017
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |                                                                 |   1%
  |                                                                       
  |=                                                                |   1%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   2%
  |                                                                       
  |==                                                               |   3%
  |                                                                       
  |==                                                               |   4%
  |                                                                       
  |===                                                              |   4%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   5%
  |                                                                       
  |====                                                             |   6%
  |                                                                       
  |====                                                             |   7%
  |                                                                       
  |=====                                                            |   7%
  |                                                                       
  |=====                                                            |   8%
  |                                                                       
  |======                                                           |   8%
  |                                                                       
  |======                                                           |   9%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |=======                                                          |  10%
  |                                                                       
  |=======                                                          |  11%
  |                                                                       
  |=======                                                          |  12%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |========                                                         |  13%
  |                                                                       
  |=========                                                        |  13%
  |                                                                       
  |=========                                                        |  14%
  |                                                                       
  |=========                                                        |  15%
  |                                                                       
  |==========                                                       |  15%
  |                                                                       
  |==========                                                       |  16%
  |                                                                       
  |===========                                                      |  16%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |===========                                                      |  18%
  |                                                                       
  |============                                                     |  18%
  |                                                                       
  |============                                                     |  19%
  |                                                                       
  |=============                                                    |  19%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |=============                                                    |  21%
  |                                                                       
  |==============                                                   |  21%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |===============                                                  |  22%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |===============                                                  |  24%
  |                                                                       
  |================                                                 |  24%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  25%
  |                                                                       
  |=================                                                |  26%
  |                                                                       
  |=================                                                |  27%
  |                                                                       
  |==================                                               |  27%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |===================                                              |  28%
  |                                                                       
  |===================                                              |  29%
  |                                                                       
  |===================                                              |  30%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |====================                                             |  31%
  |                                                                       
  |====================                                             |  32%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |=====================                                            |  33%
  |                                                                       
  |======================                                           |  33%
  |                                                                       
  |======================                                           |  34%
  |                                                                       
  |======================                                           |  35%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |=======================                                          |  36%
  |                                                                       
  |========================                                         |  36%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |========================                                         |  38%
  |                                                                       
  |=========================                                        |  38%
  |                                                                       
  |=========================                                        |  39%
  |                                                                       
  |==========================                                       |  39%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |==========================                                       |  41%
  |                                                                       
  |===========================                                      |  41%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |============================                                     |  42%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |============================                                     |  44%
  |                                                                       
  |=============================                                    |  44%
  |                                                                       
  |=============================                                    |  45%
  |                                                                       
  |==============================                                   |  45%
  |                                                                       
  |==============================                                   |  46%
  |                                                                       
  |==============================                                   |  47%
  |                                                                       
  |===============================                                  |  47%
  |                                                                       
  |===============================                                  |  48%
  |                                                                       
  |================================                                 |  48%
  |                                                                       
  |================================                                 |  49%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |=================================                                |  50%
  |                                                                       
  |=================================                                |  51%
  |                                                                       
  |=================================                                |  52%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |==================================                               |  53%
  |                                                                       
  |===================================                              |  53%
  |                                                                       
  |===================================                              |  54%
  |                                                                       
  |===================================                              |  55%
  |                                                                       
  |====================================                             |  55%
  |                                                                       
  |====================================                             |  56%
  |                                                                       
  |=====================================                            |  56%
  |                                                                       
  |=====================================                            |  57%
  |                                                                       
  |=====================================                            |  58%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |======================================                           |  59%
  |                                                                       
  |=======================================                          |  59%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |=======================================                          |  61%
  |                                                                       
  |========================================                         |  61%
  |                                                                       
  |========================================                         |  62%
  |                                                                       
  |=========================================                        |  62%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |=========================================                        |  64%
  |                                                                       
  |==========================================                       |  64%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  65%
  |                                                                       
  |===========================================                      |  66%
  |                                                                       
  |===========================================                      |  67%
  |                                                                       
  |============================================                     |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |=============================================                    |  68%
  |                                                                       
  |=============================================                    |  69%
  |                                                                       
  |=============================================                    |  70%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |==============================================                   |  71%
  |                                                                       
  |==============================================                   |  72%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |===============================================                  |  73%
  |                                                                       
  |================================================                 |  73%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |================================================                 |  75%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |=================================================                |  76%
  |                                                                       
  |==================================================               |  76%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |==================================================               |  78%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |===================================================              |  79%
  |                                                                       
  |====================================================             |  79%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |====================================================             |  81%
  |                                                                       
  |=====================================================            |  81%
  |                                                                       
  |=====================================================            |  82%
  |                                                                       
  |======================================================           |  82%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |======================================================           |  84%
  |                                                                       
  |=======================================================          |  84%
  |                                                                       
  |=======================================================          |  85%
  |                                                                       
  |========================================================         |  85%
  |                                                                       
  |========================================================         |  86%
  |                                                                       
  |========================================================         |  87%
  |                                                                       
  |=========================================================        |  87%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  88%
  |                                                                       
  |==========================================================       |  89%
  |                                                                       
  |==========================================================       |  90%
  |                                                                       
  |===========================================================      |  90%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |===========================================================      |  92%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |============================================================     |  93%
  |                                                                       
  |=============================================================    |  93%
  |                                                                       
  |=============================================================    |  94%
  |                                                                       
  |=============================================================    |  95%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |==============================================================   |  96%
  |                                                                       
  |===============================================================  |  96%
  |                                                                       
  |===============================================================  |  97%
  |                                                                       
  |===============================================================  |  98%
  |                                                                       
  |================================================================ |  98%
  |                                                                       
  |================================================================ |  99%
  |                                                                       
  |=================================================================|  99%
  |                                                                       
  |=================================================================| 100%
tx_midterms = geo_join(texas_counties, gis, "NAME", "county")
pal = colorFactor(c("blue", "red"), tx_midterms$status)

popup = paste(sep = "<br/>",
              paste0("<b>Winner: </b>", tx_midterms$winner),
              paste0("<b>County: </b>", tx_midterms$county),
              paste0("<b>Votes for Ted Cruz: </b>", tx_midterms$cruz),
              paste0("<b>Votes for Beto O'Rourke: </b>", tx_midterms$o_rourke),
              paste0("<b>Votes for Neal Dikeman: </b>", tx_midterms$dikeman),
              paste0("<b>Most searched topics over one month prior: </b>", tx_midterms$most_searched))
leaflet(tx_midterms) %>% 
  addProviderTiles("CartoDB.Positron") %>% 
  addPolygons(fillColor= ~pal(status),
              color = "black",
              popup = popup,
              fillOpacity = 0.5,
              weight = 0.5) %>% 
  addLegend(position = "bottomleft",
            colors = c("blue", "red"),
            labels = c("Democrat", "Republican"),
            opacity = 1,
            title = "2018 Texas Senate Election Results")